import re
import urllib.request as urlreq

def getElement(page_str, startText, endText):
    startPos = page_str.find(startText)
    endPos = page_str.find(endText, startPos)
    return page_str[startPos+len(startText):endPos]

f_out = open('airlines.csv', 'w')
f_out.write("Airline,Rating,#Reviews\n")

mainpage = str(urlreq.urlopen("http://www.airlinequality.com/Forum/seats.htm").read())

links = re.compile('<a\s*href=[\'|"](.*?)[\'"].*?>').findall(mainpage)

for i in range(10,621):
    try:
        page = str(urlreq.urlopen("http://www.airlinequality.com/Forum/" + links[i]).read())
    except:
        continue

    name = getElement(page, '<span class="fn">', '</span>').split(": ")[1]

    rating = getElement(page, '<span class="average">', '</span>')
    
    numreviews = getElement(page, '<b><span class="count">', '</span>')

    f_out.write(name+","+rating+","+numreviews+"\n")

f_out.close()